from pyspark import SparkContext, SparkConf
from pyspark.sql import SQLContext

# 1.初始化spark
conf = SparkConf().setAppName('sparkdemo').setMaster('local')
sc = SparkContext(conf=conf)

# 2.创建RDD
data = [1, 2, 3, 4, 5]
##2.1将列表并行化
distData = sc.parallelize(data)


# 3.将数据进行整理
rdd = distData.map(lambda x: (x, "a" * x ))

# 4.处理结果数据
rdd.saveAsTextFile("C:\\Users\\user\\Desktop\\pysparkfile")


# 5.释放资源
sc.stop




# launching spark job
# python e:/workspace/vsworkspace/pyspark_learning/sparkdemo.py 